from IPython.display import HTML
HTML('''
<style>
div.prompt {display:none}
div.input {display:none}
div.output_stderr {display:none}
</style>
<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
$('div.prompt').hide();
$('div.output_stderr').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"></form>''')
import csv
import numpy as np
import pandas as pd
from scipy import stats
import math
import io
import requests
from io import StringIO
from datetime import datetime
from IPython.display import display
%matplotlib inline
import matplotlib.pyplot as plt
df = pd.read_csv('musedb.csv',sep=',',error_bad_lines=False,warn_bad_lines=False)
Number of products sold
df["BILLNO"].count()
Number of Bills generated
df["BILLNO"].nunique()
Approximately average product selling frequency on each bill is....
df["BILLNO"].count()/df["BILLNO"].nunique()
import cufflinks as cf
cf.set_config_file(offline=True)
import plotly as py
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot,offline
import plotly.graph_objs as go
vl_reg=df["STOREID"].value_counts()
reg=pd.DataFrame({'labels':vl_reg.index.tolist(), 'values':vl_reg.values.tolist()})
res=reg[reg["values"]>6]
samp=res.iplot(kind='pie',labels='labels',values='values',title="No of product sold at different store")
brand=df["BRAND_DESC"].value_counts()
l=brand.sum()
reg=pd.DataFrame({'labels':brand.index.tolist(), 'values':brand.values.tolist()})
res=reg[reg["values"]>(l*0.008)]
samp=res.iplot(kind='pie',labels='labels',values='values',title="No of product sold of different brand")
df["UPDATE_STAMP"]=pd.to_datetime(df["UPDATE_STAMP"], format='%Y-%m-%d %H:%M')
df["up_mon"]=df["UPDATE_STAMP"].map(lambda x: x.strftime('%Y-%m'))
cat=df["CATEGORY_DESC"].value_counts()
l=float(cat.sum())
cat=cat[cat>(l*0.005)]
df_cat= df[(df["CATEGORY_DESC"]!="Others") & (df["CATEGORY_DESC"]!="OTHERS") & (df["CATEGORY_DESC"]!="Unknown")]
df_cat=df_cat[df_cat.CATEGORY_DESC.isin(cat.index.tolist())]
result = df_cat.pivot_table(index='CATEGORY_DESC', columns="up_mon", values="BILLNO",aggfunc="count")
iplot(result.iplot(kind='bar', barmode="stack", asFigure=True,
title="Count of diferent categories sold in each month",
yTitle="Count"))
sub=df["SUBCATEGORY_DESC"].value_counts()
l=float(sub.sum())
sub=sub[sub>(l*0.005)]
df_sub=df[df.SUBCATEGORY_DESC.isin(sub.index.tolist())]
result = df_sub.pivot_table(index='SUBCATEGORY_DESC', columns="up_mon", values="BILLNO",aggfunc="count")
iplot(result.iplot(kind='bar', barmode="stack", asFigure=True,
title="Count of diferent subcategories sold in each month",
yTitle="Count",
margin=dict(l=40,r=20,b=210,t=40)))
result = df.pivot_table( columns="up_mon", values="BILLNO",aggfunc="count")
iplot(result.T.iplot(kind='bar', barmode='stack', asFigure=True,
title="Frequency of product sold in each month",
xTitle="Month year",
yTitle="No of product"))
result = df_sub.pivot_table(index='STOREID', columns="up_mon", values="BILLNO",aggfunc="count")
iplot(result.T.iplot(kind='bar', barmode="stack", asFigure=True,
title="Most frequently used store in each month",
yTitle="Count"))
iplot(df.drop_duplicates(subset=["BILLNO"]).pivot_table(columns="up_mon", values="BILLNO",aggfunc="count").T.iplot(kind='bar', barmode="stack", asFigure=True,
title="No of bills generated on each month",
yTitle="Count of bills"))